THIS IS MY NEW TEST Load packages + make it so table always displays NA’s

suppressMessages(silent <- lapply(
    c("plyr", "dplyr", "tidyverse", "data.table", "vroom", "knitr"), 
    library, character.only=T))
table = function (..., useNA = 'always') base::table(..., useNA = useNA)

Load UK Biobank dataset

bd=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukb34137.tab", delim="\t", show_col_types = FALSE)
source("src/components/ukb34137_factordata.R") #file provided by UKB "ukbxxxxx_loaddata.R" without the loading part, to label the responses in survey questions
bd=as_tibble(bd)
dim(bd)
## [1] 502527   5172
bd=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukbXXXXX.tab", delim="\t", show_col_types = FALSE)
source("src/components/ukbXXXXX_factordata.R") #file provided by UKB "ukbxxxxx_loaddata.R" without the loading part, to label the responses in survey questions
bd=as_tibble(bd)
dim(bd)

Load in second UK Biobank dataset

#read.lines("src/components/load_UKBphenotables.r") #how I made this file

bd_join4<-vroom("bd_join4.tab", delim="\t", show_col_types = FALSE)
## Warning: One or more parsing issues, see `problems()` for details
bd_join4=as_tibble(bd_join4)
dim(bd_join4)
## [1] 501560   2584

Read in extra files

source("src/components/manyColsToDummy.R")
hormonemeds<-read.csv("src/components/sexHormoneMeds.csv")
source("src/components/SSRI_Meds.R")
codes<-read.table("src/components/phenocodes.txt", header=F)
QCids<-read.table("src/components/bd_QC-keep.txt",header=TRUE)
withdrawn<-read.csv("src/components/w48818_20210809.csv", header = FALSE)
as_tibble(hormonemeds);as_tibble(ssricodes);as_tibble(codes);as_tibble(QCids);as_tibble(withdrawn)
## # A tibble: 160 × 2
##          code name                                       
##         <int> <chr>                                      
##  1 1140869270 "\"medroxyprogesterone\""                  
##  2 1140910674 "\"ethinylnortestosterone\""               
##  3 1140857656 "\"methyltestosterone product\""           
##  4 1140865136 "\"yohimbine/pemoline/methyltestosterone\""
##  5 1140868532 "\"testosterone product\""                 
##  6 1140857690 "\"oestradiol 25mg implant 36 week\""      
##  7 1140857700 "\"oestradiol 1mg/1ml injection\""         
##  8 1141165318 "\"cetrorelix\""                           
##  9 1140917306 "\"bicalutamide\""                         
## 10 1140917310 "\"casodex 50mg tablet\""                  
## # … with 150 more rows
## # A tibble: 9 × 1
##        value
##        <dbl>
## 1 1140921600
## 2 1141180212
## 3 1140879540
## 4 1140867888
## 5 1140867878
## 6 1140867876
## 7 1140867884
## 8 1140867888
## 9 1140867860
## # A tibble: 30 × 2
##       V1 V2                        
##    <int> <chr>                     
##  1 30620 Alanine_aminotransferase  
##  2 30600 Albumin                   
##  3 30610 Alkaline_phosphatase      
##  4 30630 Apolipoprotein_A          
##  5 30640 Apolipoprotein_B          
##  6 30650 Aspartate_aminotransferase
##  7 30710 C-reactive_protein        
##  8 30680 Calcium                   
##  9 30690 Cholesterol               
## 10 30700 Creatinine                
## # … with 20 more rows
## # A tibble: 356,980 × 1
##        IID
##      <int>
##  1 1000013
##  2 1000036
##  3 1000048
##  4 1000055
##  5 1000067
##  6 1000072
##  7 1000080
##  8 1000099
##  9 1000106
## 10 1000123
## # … with 356,970 more rows
## # A tibble: 68 × 1
##         V1
##      <int>
##  1 1044796
##  2 1046204
##  3 1046731
##  4 1154359
##  5 1305102
##  6 1394987
##  7 1410887
##  8 1425582
##  9 1525271
## 10 1733666
## # … with 58 more rows

Extract columns from bd dataframe

pheno<-bd%>%select(f.eid,f.21003.0.0, f.31.0.0,
                        f.189.0.0,
                   f.54.0.0, f.22000.0.0,
            f.1558.0.0,
        paste("f.", codes$V1, ".0.0", sep=""))
colnames(pheno)<-c("IID", "Age", "Sex",  
            "Townsend",
                   "Assessment_center", "Geno_batch",
            "AlcoholFreq",
            codes$V2)

pheno<-as_tibble(pheno)

EOF